sum_4_normalized_pressures_health_selective <- read_csv(here("data/sum_4_normalized_pressures_health_selective.csv"))
## Rows: 3133 Columns: 65
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): fips, state, county
## dbl (62): sum_1_disturbance, sum_1_water, sum_1_ghg, sum_1_nutrient, sum_4_c...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
6 States with highest cumulative pressure of food production
state_data <- sum_4_normalized_pressures_health_selective %>%
group_by(state) %>%
summarise(cum_pressure = sum(sum_4_cumulative),
median_rural = median(per_rural),
median_state_income = median(median_household_income),
state_pop = sum(population))
# filter state data to get top 6 states regarding cumulative pressure, filter for those states
sum_4_normalized_pressures_health_selective_top_states <- sum_4_normalized_pressures_health_selective %>%
filter(state == "Iowa" | state == "Illionis"|state == "Nebraska"| state == "Indiana"| state =="Minnesota"| state =="Kansas")
median(sum_4_normalized_pressures_health_selective_top_states$sum_4_cumulative)
## [1] 0.003039106
median(sum_4_normalized_pressures_health_selective$sum_4_cumulative)
## [1] 0.0007026671
max(sum_4_normalized_pressures_health_selective_top_states$sum_4_cumulative)
## [1] 0.009332784
max(sum_4_normalized_pressures_health_selective$sum_4_cumulative)
## [1] 0.009332784
min(sum_4_normalized_pressures_health_selective_top_states$sum_4_cumulative)
## [1] 0.000003166402
sum_4_normalized_pressures_health_selective_no_na_top_states <- sum_4_normalized_pressures_health_selective_top_states %>%
drop_na(per_fair_poor_health, per_low_birthweight, per_access_to_exercise, life_expectancy, per_freq_mental_distress, per_freq_mental_distress, per_adult_obesity, median_household_income, per_rural, per_access_to_exercise, per_uninsured, primary_care_phys_quartile, mental_health_providers_quartile, per_child_poverty, air_pollution_avg_daily_pm2.5, drinking_water_violation_quartile, per_severe_house_cost_burden, per_severe_housing_problems, per_food_insecure, sum_4_cumulative, per_limited_access_healthy_food, per_black, per_asian, per_am_indian_alaska_native, per_nativeHA_other_pacific_isl, per_hispanic, population, per_smokers, per_physically_inactive, per_excessive_drinking, per_flu_vaccinated, per_completed_hs, per_some_college, traffic_volume, per_broadband_access
)
rf_per_fair_poor_health_top_states <- randomForest(per_fair_poor_health ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)
print(rf_per_fair_poor_health_top_states)
##
## Call:
## randomForest(formula = per_fair_poor_health ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 0.8094195
## % Var explained: 90.86
print(importance(rf_per_fair_poor_health_top_states,type = 2))
## IncNodePurity
## per_rural 28.786756
## median_household_income 105.061185
## population 22.455681
## per_access_to_exercise 20.879370
## per_uninsured 281.138617
## primary_care_phys_quartile 8.780831
## mental_health_providers_quartile 6.933924
## per_child_poverty 136.595556
## air_pollution_avg_daily_pm2.5 63.056478
## drinking_water_violation_quartile 3.926460
## per_severe_house_cost_burden 13.720881
## per_severe_housing_problems 33.272707
## per_food_insecure 161.129332
## sum_4_cumulative 22.752731
## per_limited_access_healthy_food 17.183330
## per_black 21.355248
## per_asian 17.605994
## per_am_indian_alaska_native 41.978807
## per_nativeHA_other_pacific_isl 6.096772
## per_hispanic 131.882880
## per_smokers 571.564248
## per_physically_inactive 38.496805
## per_excessive_drinking 1022.186738
## per_flu_vaccinated 16.855264
## per_completed_hs 506.712898
## per_some_college 576.266628
## traffic_volume 20.863773
## per_broadband_access 43.284037
varImpPlot(rf_per_fair_poor_health_top_states, type = 2)
rf_life_expectancy_top_states <- randomForest(life_expectancy ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)
print(rf_life_expectancy_top_states)
##
## Call:
## randomForest(formula = life_expectancy ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 2.830156
## % Var explained: 48.59
print(importance(rf_life_expectancy_top_states,type = 2))
## IncNodePurity
## per_rural 28.84049
## median_household_income 133.88460
## population 65.14073
## per_access_to_exercise 55.57058
## per_uninsured 65.35269
## primary_care_phys_quartile 15.72920
## mental_health_providers_quartile 16.10401
## per_child_poverty 305.90368
## air_pollution_avg_daily_pm2.5 83.41265
## drinking_water_violation_quartile 8.58231
## per_severe_house_cost_burden 39.62646
## per_severe_housing_problems 47.36174
## per_food_insecure 197.55617
## sum_4_cumulative 54.57015
## per_limited_access_healthy_food 54.43319
## per_black 38.89374
## per_asian 43.92771
## per_am_indian_alaska_native 75.88630
## per_nativeHA_other_pacific_isl 11.27913
## per_hispanic 47.47724
## per_smokers 280.21599
## per_physically_inactive 123.81523
## per_excessive_drinking 178.11039
## per_flu_vaccinated 64.67701
## per_completed_hs 118.16497
## per_some_college 85.65338
## traffic_volume 49.69682
## per_broadband_access 89.22437
varImpPlot(rf_life_expectancy_top_states, type = 2)
rf_per_low_birthweight_top_states <- randomForest(per_low_birthweight ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)
print(rf_per_low_birthweight_top_states)
##
## Call:
## randomForest(formula = per_low_birthweight ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 1.061943
## % Var explained: 29.64
print(importance(rf_per_low_birthweight_top_states,type = 2))
## IncNodePurity
## per_rural 17.738903
## median_household_income 27.398507
## population 43.463242
## per_access_to_exercise 22.852275
## per_uninsured 25.638052
## primary_care_phys_quartile 5.858828
## mental_health_providers_quartile 9.583828
## per_child_poverty 24.669704
## air_pollution_avg_daily_pm2.5 74.695058
## drinking_water_violation_quartile 4.486703
## per_severe_house_cost_burden 12.339381
## per_severe_housing_problems 13.682084
## per_food_insecure 32.244399
## sum_4_cumulative 30.188080
## per_limited_access_healthy_food 16.750043
## per_black 25.846735
## per_asian 15.065219
## per_am_indian_alaska_native 18.952179
## per_nativeHA_other_pacific_isl 5.470050
## per_hispanic 20.926135
## per_smokers 23.381296
## per_physically_inactive 33.771432
## per_excessive_drinking 30.302415
## per_flu_vaccinated 17.746845
## per_completed_hs 16.206792
## per_some_college 21.767651
## traffic_volume 41.027023
## per_broadband_access 16.090605
varImpPlot(rf_per_low_birthweight_top_states, type = 2)
rf_per_freq_mental_distress_top_states <- randomForest(per_freq_mental_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)
print(rf_per_freq_mental_distress_top_states)
##
## Call:
## randomForest(formula = per_freq_mental_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 0.2621599
## % Var explained: 91.95
print(importance(rf_per_freq_mental_distress_top_states,type = 2))
## IncNodePurity
## per_rural 5.689955
## median_household_income 36.287496
## population 10.373301
## per_access_to_exercise 10.064708
## per_uninsured 82.063035
## primary_care_phys_quartile 1.724042
## mental_health_providers_quartile 1.249132
## per_child_poverty 24.235228
## air_pollution_avg_daily_pm2.5 118.103790
## drinking_water_violation_quartile 1.498116
## per_severe_house_cost_burden 4.798502
## per_severe_housing_problems 6.509386
## per_food_insecure 125.687325
## sum_4_cumulative 15.577046
## per_limited_access_healthy_food 5.260310
## per_black 5.134872
## per_asian 9.130129
## per_am_indian_alaska_native 15.149269
## per_nativeHA_other_pacific_isl 2.860719
## per_hispanic 9.286602
## per_smokers 367.117895
## per_physically_inactive 16.617621
## per_excessive_drinking 434.437089
## per_flu_vaccinated 6.927124
## per_completed_hs 33.953933
## per_some_college 66.286494
## traffic_volume 5.918396
## per_broadband_access 21.274809
varImpPlot(rf_per_freq_mental_distress_top_states, type = 2)
rf_per_freq_physical_distress_top_states <- randomForest(per_freq_physical_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)
print(rf_per_freq_physical_distress_top_states)
##
## Call:
## randomForest(formula = per_freq_physical_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 0.282749
## % Var explained: 88.65
print(importance(rf_per_freq_physical_distress_top_states,type = 2))
## IncNodePurity
## per_rural 4.836674
## median_household_income 34.355501
## population 6.066634
## per_access_to_exercise 5.449328
## per_uninsured 71.122487
## primary_care_phys_quartile 2.252014
## mental_health_providers_quartile 1.440958
## per_child_poverty 43.193670
## air_pollution_avg_daily_pm2.5 27.394455
## drinking_water_violation_quartile 1.161147
## per_severe_house_cost_burden 4.451435
## per_severe_housing_problems 10.383857
## per_food_insecure 60.300711
## sum_4_cumulative 9.956034
## per_limited_access_healthy_food 6.323209
## per_black 4.039418
## per_asian 6.563057
## per_am_indian_alaska_native 19.773587
## per_nativeHA_other_pacific_isl 1.827321
## per_hispanic 11.422503
## per_smokers 303.248499
## per_physically_inactive 10.665358
## per_excessive_drinking 263.721695
## per_flu_vaccinated 6.092631
## per_completed_hs 53.067135
## per_some_college 115.354058
## traffic_volume 4.549182
## per_broadband_access 17.400827
varImpPlot(rf_per_freq_physical_distress_top_states, type = 2)
rf_per_adult_obesisty_top_states <- randomForest(per_adult_obesity ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)
print(rf_per_adult_obesisty_top_states)
##
## Call:
## randomForest(formula = per_adult_obesity ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_states, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 11.74257
## % Var explained: 26.67
print(importance(rf_per_adult_obesisty_top_states,type = 2))
## IncNodePurity
## per_rural 146.61501
## median_household_income 250.76292
## population 362.04415
## per_access_to_exercise 258.15551
## per_uninsured 144.63068
## primary_care_phys_quartile 131.35205
## mental_health_providers_quartile 79.65116
## per_child_poverty 266.87808
## air_pollution_avg_daily_pm2.5 277.33960
## drinking_water_violation_quartile 46.08876
## per_severe_house_cost_burden 141.36550
## per_severe_housing_problems 190.75389
## per_food_insecure 163.36858
## sum_4_cumulative 421.20268
## per_limited_access_healthy_food 166.56377
## per_black 160.72526
## per_asian 288.91127
## per_am_indian_alaska_native 199.21364
## per_nativeHA_other_pacific_isl 98.38842
## per_hispanic 264.40879
## per_smokers 169.74197
## per_physically_inactive 910.00567
## per_excessive_drinking 168.58854
## per_flu_vaccinated 199.30157
## per_completed_hs 242.59343
## per_some_college 549.96623
## traffic_volume 220.38974
## per_broadband_access 378.84904
varImpPlot(rf_per_adult_obesisty_top_states, type = 2)
Country Level (all counties)
sum_4_normalized_pressures_health_selective_no_na_health_all_counties <- sum_4_normalized_pressures_health_selective %>%
drop_na(per_fair_poor_health, per_low_birthweight, per_access_to_exercise, life_expectancy, per_freq_mental_distress, per_freq_mental_distress, per_adult_obesity, median_household_income, per_rural, per_access_to_exercise, per_uninsured, primary_care_phys_quartile, mental_health_providers_quartile, per_child_poverty, air_pollution_avg_daily_pm2.5, drinking_water_violation_quartile, per_severe_house_cost_burden, per_severe_housing_problems, per_food_insecure, sum_4_cumulative, per_limited_access_healthy_food, per_black, per_asian, per_am_indian_alaska_native, per_nativeHA_other_pacific_isl, per_hispanic, population, per_smokers, per_physically_inactive, per_excessive_drinking, per_flu_vaccinated, per_completed_hs, per_some_college, traffic_volume, per_broadband_access
)
rf_per_fair_poor_health_all_counties <- randomForest(per_fair_poor_health ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)
print(rf_per_fair_poor_health_all_counties)
##
## Call:
## randomForest(formula = per_fair_poor_health ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 1.40382
## % Var explained: 94.55
print(importance(rf_per_fair_poor_health_all_counties,type = 2))
## IncNodePurity
## per_rural 256.70763
## median_household_income 7946.51901
## population 342.49930
## per_access_to_exercise 327.56183
## per_uninsured 606.86633
## primary_care_phys_quartile 69.32211
## mental_health_providers_quartile 69.65844
## per_child_poverty 9146.88505
## air_pollution_avg_daily_pm2.5 519.55709
## drinking_water_violation_quartile 49.51777
## per_severe_house_cost_burden 160.55338
## per_severe_housing_problems 273.93878
## per_food_insecure 16556.98117
## sum_4_cumulative 368.82763
## per_limited_access_healthy_food 205.98351
## per_black 528.14768
## per_asian 205.06834
## per_am_indian_alaska_native 290.18232
## per_nativeHA_other_pacific_isl 92.03013
## per_hispanic 1289.28610
## per_smokers 3376.88245
## per_physically_inactive 681.15287
## per_excessive_drinking 3904.60062
## per_flu_vaccinated 297.89889
## per_completed_hs 21524.75245
## per_some_college 5405.13556
## traffic_volume 239.93625
## per_broadband_access 1712.62939
varImpPlot(rf_per_fair_poor_health_all_counties, type = 2)
rf_life_expectancy_all_counties <- randomForest(life_expectancy ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)
print(rf_life_expectancy_all_counties)
##
## Call:
## randomForest(formula = life_expectancy ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 2.841018
## % Var explained: 68.04
print(importance(rf_life_expectancy_all_counties,type = 2))
## IncNodePurity
## per_rural 370.47077
## median_household_income 2177.33275
## population 553.57872
## per_access_to_exercise 428.64344
## per_uninsured 376.65377
## primary_care_phys_quartile 103.21559
## mental_health_providers_quartile 123.04834
## per_child_poverty 2047.38047
## air_pollution_avg_daily_pm2.5 754.69055
## drinking_water_violation_quartile 59.91546
## per_severe_house_cost_burden 360.74254
## per_severe_housing_problems 440.84633
## per_food_insecure 4108.88469
## sum_4_cumulative 525.90190
## per_limited_access_healthy_food 473.15581
## per_black 458.94091
## per_asian 323.79845
## per_am_indian_alaska_native 518.05550
## per_nativeHA_other_pacific_isl 93.81991
## per_hispanic 621.69502
## per_smokers 4908.72589
## per_physically_inactive 1696.87366
## per_excessive_drinking 1271.77056
## per_flu_vaccinated 368.49241
## per_completed_hs 871.78751
## per_some_college 735.83487
## traffic_volume 374.39356
## per_broadband_access 719.51820
varImpPlot(rf_life_expectancy_all_counties, type = 2)
rf_per_low_birthweight_all_counties <- randomForest(per_low_birthweight ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)
print(rf_per_low_birthweight_all_counties)
##
## Call:
## randomForest(formula = per_low_birthweight ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 1.398634
## % Var explained: 66.16
print(importance(rf_per_low_birthweight_all_counties,type = 2))
## IncNodePurity
## per_rural 174.91256
## median_household_income 710.51930
## population 295.68525
## per_access_to_exercise 176.45653
## per_uninsured 162.40290
## primary_care_phys_quartile 53.65224
## mental_health_providers_quartile 52.00121
## per_child_poverty 1458.75772
## air_pollution_avg_daily_pm2.5 354.02545
## drinking_water_violation_quartile 38.12946
## per_severe_house_cost_burden 259.97561
## per_severe_housing_problems 138.93731
## per_food_insecure 785.07983
## sum_4_cumulative 420.10664
## per_limited_access_healthy_food 244.05797
## per_black 3560.69833
## per_asian 128.71120
## per_am_indian_alaska_native 347.62696
## per_nativeHA_other_pacific_isl 73.97515
## per_hispanic 283.76810
## per_smokers 178.51290
## per_physically_inactive 228.90655
## per_excessive_drinking 757.05533
## per_flu_vaccinated 156.87790
## per_completed_hs 389.79011
## per_some_college 193.33238
## traffic_volume 180.35702
## per_broadband_access 268.77203
varImpPlot(rf_per_low_birthweight_all_counties, type = 2)
rf_per_freq_mental_distress_all_counties <- randomForest(per_freq_mental_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)
print(rf_per_freq_mental_distress_all_counties)
##
## Call:
## randomForest(formula = per_freq_mental_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 0.4737342
## % Var explained: 91.44
print(importance(rf_per_freq_mental_distress_all_counties,type = 2))
## IncNodePurity
## per_rural 90.16618
## median_household_income 1534.99764
## population 139.70721
## per_access_to_exercise 97.17397
## per_uninsured 102.00067
## primary_care_phys_quartile 23.26137
## mental_health_providers_quartile 23.92665
## per_child_poverty 1129.56584
## air_pollution_avg_daily_pm2.5 347.75955
## drinking_water_violation_quartile 23.16687
## per_severe_house_cost_burden 63.30955
## per_severe_housing_problems 84.64563
## per_food_insecure 2713.47027
## sum_4_cumulative 276.00176
## per_limited_access_healthy_food 63.28291
## per_black 124.30758
## per_asian 153.00939
## per_am_indian_alaska_native 149.06624
## per_nativeHA_other_pacific_isl 26.14238
## per_hispanic 367.08431
## per_smokers 5598.03309
## per_physically_inactive 187.81551
## per_excessive_drinking 1118.13368
## per_flu_vaccinated 91.84770
## per_completed_hs 578.54187
## per_some_college 979.44020
## traffic_volume 79.87425
## per_broadband_access 209.08999
varImpPlot(rf_per_freq_mental_distress_all_counties, type = 2)
rf_per_freq_physical_distress_all_counties <- randomForest(per_freq_physical_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)
print(rf_per_freq_physical_distress_all_counties)
##
## Call:
## randomForest(formula = per_freq_physical_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 0.5217201
## % Var explained: 92.28
print(importance(rf_per_freq_physical_distress_all_counties,type = 2))
## IncNodePurity
## per_rural 71.54952
## median_household_income 2452.71092
## population 170.46982
## per_access_to_exercise 89.78543
## per_uninsured 125.38818
## primary_care_phys_quartile 27.10406
## mental_health_providers_quartile 27.98624
## per_child_poverty 2384.82060
## air_pollution_avg_daily_pm2.5 148.33514
## drinking_water_violation_quartile 18.19555
## per_severe_house_cost_burden 66.03350
## per_severe_housing_problems 119.95761
## per_food_insecure 5058.24880
## sum_4_cumulative 182.82119
## per_limited_access_healthy_food 74.68558
## per_black 109.75135
## per_asian 86.42335
## per_am_indian_alaska_native 154.12568
## per_nativeHA_other_pacific_isl 44.29480
## per_hispanic 159.34963
## per_smokers 3751.47080
## per_physically_inactive 134.77968
## per_excessive_drinking 999.76813
## per_flu_vaccinated 118.78978
## per_completed_hs 1492.20049
## per_some_college 1494.20414
## traffic_volume 103.30618
## per_broadband_access 386.15283
varImpPlot(rf_per_freq_physical_distress_all_counties, type = 2)
rf_per_adult_obesity_all_counties <- randomForest(per_adult_obesity ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)
print(rf_per_adult_obesity_all_counties)
##
## Call:
## randomForest(formula = per_adult_obesity ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_health_all_counties, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 16.59649
## % Var explained: 52.06
print(importance(rf_per_adult_obesity_all_counties,type = 2))
## IncNodePurity
## per_rural 2184.3570
## median_household_income 3512.3292
## population 4714.0214
## per_access_to_exercise 5014.1856
## per_uninsured 2468.4934
## primary_care_phys_quartile 968.5378
## mental_health_providers_quartile 666.9165
## per_child_poverty 2479.9364
## air_pollution_avg_daily_pm2.5 2898.8951
## drinking_water_violation_quartile 448.8939
## per_severe_house_cost_burden 2142.9560
## per_severe_housing_problems 1895.6377
## per_food_insecure 2735.0840
## sum_4_cumulative 4755.1602
## per_limited_access_healthy_food 1868.3310
## per_black 6966.6975
## per_asian 2030.6819
## per_am_indian_alaska_native 2427.4708
## per_nativeHA_other_pacific_isl 623.8955
## per_hispanic 3164.6175
## per_smokers 8583.4853
## per_physically_inactive 20665.8010
## per_excessive_drinking 3711.5668
## per_flu_vaccinated 2156.4085
## per_completed_hs 1879.5275
## per_some_college 2890.1156
## traffic_volume 2382.2113
## per_broadband_access 4231.7795
varImpPlot(rf_per_adult_obesity_all_counties, type = 2)
Top 500 Counties
# Filter data for the top 500 counties regarding cumulative pressure
sum_4_normalized_pressures_health_selective_top_counties <- sum_4_normalized_pressures_health_selective %>%
filter(sum_4_cumulative > 0.002725337)
# drop na's
sum_4_normalized_pressures_health_selective_no_na_top_counties <- sum_4_normalized_pressures_health_selective_top_counties %>%
drop_na(per_fair_poor_health, per_low_birthweight, per_access_to_exercise, life_expectancy, per_freq_mental_distress, per_freq_mental_distress, per_adult_obesity, median_household_income, per_rural, per_access_to_exercise, per_uninsured, primary_care_phys_quartile, mental_health_providers_quartile, per_child_poverty, air_pollution_avg_daily_pm2.5, drinking_water_violation_quartile, per_severe_house_cost_burden, per_severe_housing_problems, per_food_insecure, sum_4_cumulative, per_limited_access_healthy_food, per_black, per_asian, per_am_indian_alaska_native, per_nativeHA_other_pacific_isl, per_hispanic, population, per_smokers, per_physically_inactive, per_excessive_drinking, per_flu_vaccinated, per_completed_hs, per_some_college, traffic_volume, per_broadband_access)
rf_per_fair_poor_health_top_counties <- randomForest(per_fair_poor_health ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)
print(rf_per_fair_poor_health_top_counties)
##
## Call:
## randomForest(formula = per_fair_poor_health ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 1.193045
## % Var explained: 95.2
print(importance(rf_per_fair_poor_health_top_counties,type = 2))
## IncNodePurity
## per_rural 34.050826
## median_household_income 672.573842
## population 54.834333
## per_access_to_exercise 35.297699
## per_uninsured 104.263849
## primary_care_phys_quartile 10.410936
## mental_health_providers_quartile 15.472407
## per_child_poverty 1550.614364
## air_pollution_avg_daily_pm2.5 147.024739
## drinking_water_violation_quartile 6.653721
## per_severe_house_cost_burden 37.313590
## per_severe_housing_problems 55.632084
## per_food_insecure 2302.847218
## sum_4_cumulative 38.090259
## per_limited_access_healthy_food 37.971868
## per_black 395.352398
## per_asian 23.384750
## per_am_indian_alaska_native 46.249213
## per_nativeHA_other_pacific_isl 11.313782
## per_hispanic 254.484289
## per_smokers 712.203458
## per_physically_inactive 82.779413
## per_excessive_drinking 1903.806991
## per_flu_vaccinated 55.041212
## per_completed_hs 2435.526005
## per_some_college 995.831350
## traffic_volume 33.993670
## per_broadband_access 168.719342
varImpPlot(rf_per_fair_poor_health_top_counties, type = 2)
rf_life_expectancy_top_counties <- randomForest(life_expectancy ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)
print(rf_life_expectancy_top_counties)
##
## Call:
## randomForest(formula = life_expectancy ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 2.068896
## % Var explained: 69.93
print(importance(rf_life_expectancy_top_counties,type = 2))
## IncNodePurity
## per_rural 31.937227
## median_household_income 298.739221
## population 55.585861
## per_access_to_exercise 41.780710
## per_uninsured 31.020836
## primary_care_phys_quartile 11.577034
## mental_health_providers_quartile 12.221595
## per_child_poverty 362.255456
## air_pollution_avg_daily_pm2.5 83.536677
## drinking_water_violation_quartile 9.626732
## per_severe_house_cost_burden 32.851368
## per_severe_housing_problems 39.595218
## per_food_insecure 619.646898
## sum_4_cumulative 44.943352
## per_limited_access_healthy_food 40.785632
## per_black 129.055539
## per_asian 43.152987
## per_am_indian_alaska_native 46.276526
## per_nativeHA_other_pacific_isl 12.038194
## per_hispanic 52.550449
## per_smokers 750.373242
## per_physically_inactive 81.700387
## per_excessive_drinking 184.495571
## per_flu_vaccinated 59.614529
## per_completed_hs 69.248357
## per_some_college 78.649727
## traffic_volume 40.244411
## per_broadband_access 81.271277
varImpPlot(rf_life_expectancy_top_counties, type = 2)
rf_per_low_birthweight_top_counties <- randomForest(per_low_birthweight ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)
print(rf_per_low_birthweight_top_counties)
##
## Call:
## randomForest(formula = per_low_birthweight ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 1.114922
## % Var explained: 71.91
print(importance(rf_per_low_birthweight_top_counties,type = 2))
## IncNodePurity
## per_rural 26.761538
## median_household_income 189.406899
## population 40.036708
## per_access_to_exercise 26.426263
## per_uninsured 14.611411
## primary_care_phys_quartile 7.270707
## mental_health_providers_quartile 6.079363
## per_child_poverty 358.728333
## air_pollution_avg_daily_pm2.5 58.344957
## drinking_water_violation_quartile 5.142533
## per_severe_house_cost_burden 27.778285
## per_severe_housing_problems 19.466638
## per_food_insecure 223.236235
## sum_4_cumulative 29.366792
## per_limited_access_healthy_food 20.767294
## per_black 421.531746
## per_asian 17.717896
## per_am_indian_alaska_native 26.029840
## per_nativeHA_other_pacific_isl 7.151121
## per_hispanic 24.572842
## per_smokers 68.019324
## per_physically_inactive 28.911768
## per_excessive_drinking 121.640604
## per_flu_vaccinated 21.397055
## per_completed_hs 19.164541
## per_some_college 25.379371
## traffic_volume 29.353459
## per_broadband_access 64.327294
varImpPlot(rf_per_low_birthweight_top_counties, type = 2)
rf_per_freq_mental_distress_top_counties <- randomForest(per_freq_mental_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)
print(rf_per_freq_mental_distress_top_counties)
##
## Call:
## randomForest(formula = per_freq_mental_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 0.4369113
## % Var explained: 91.76
print(importance(rf_per_freq_mental_distress_top_counties,type = 2))
## IncNodePurity
## per_rural 9.805005
## median_household_income 91.055267
## population 14.696418
## per_access_to_exercise 17.389718
## per_uninsured 22.939374
## primary_care_phys_quartile 2.665413
## mental_health_providers_quartile 3.063052
## per_child_poverty 146.304671
## air_pollution_avg_daily_pm2.5 247.526871
## drinking_water_violation_quartile 3.258657
## per_severe_house_cost_burden 6.210033
## per_severe_housing_problems 9.754670
## per_food_insecure 277.928860
## sum_4_cumulative 17.488185
## per_limited_access_healthy_food 8.578817
## per_black 31.744099
## per_asian 13.307164
## per_am_indian_alaska_native 18.327144
## per_nativeHA_other_pacific_isl 4.242416
## per_hispanic 21.665807
## per_smokers 837.002462
## per_physically_inactive 36.594622
## per_excessive_drinking 522.379759
## per_flu_vaccinated 14.224086
## per_completed_hs 59.844783
## per_some_college 123.659869
## traffic_volume 11.231708
## per_broadband_access 34.528963
varImpPlot(rf_per_freq_mental_distress_top_counties, type = 2)
rf_per_freq_physical_distress_top_counties <- randomForest(per_freq_physical_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)
print(rf_per_freq_physical_distress_top_counties)
##
## Call:
## randomForest(formula = per_freq_physical_distress ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 0.4535493
## % Var explained: 92.83
print(importance(rf_per_freq_physical_distress_top_counties,type = 2))
## IncNodePurity
## per_rural 10.343930
## median_household_income 199.171118
## population 20.342420
## per_access_to_exercise 13.349614
## per_uninsured 21.554603
## primary_care_phys_quartile 3.090773
## mental_health_providers_quartile 3.945015
## per_child_poverty 411.208032
## air_pollution_avg_daily_pm2.5 68.466269
## drinking_water_violation_quartile 3.077649
## per_severe_house_cost_burden 10.821128
## per_severe_housing_problems 16.930982
## per_food_insecure 693.779544
## sum_4_cumulative 14.645575
## per_limited_access_healthy_food 10.677688
## per_black 65.978492
## per_asian 9.302637
## per_am_indian_alaska_native 13.402594
## per_nativeHA_other_pacific_isl 4.164479
## per_hispanic 38.210654
## per_smokers 455.724776
## per_physically_inactive 21.722863
## per_excessive_drinking 495.855361
## per_flu_vaccinated 13.692134
## per_completed_hs 195.205198
## per_some_college 218.020418
## traffic_volume 14.101794
## per_broadband_access 60.961589
varImpPlot(rf_per_freq_physical_distress_top_counties, type = 2)
rf_per_adult_obesity_top_counties <- randomForest(per_adult_obesity ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access,
data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)
print(rf_per_adult_obesity_top_counties)
##
## Call:
## randomForest(formula = per_adult_obesity ~ per_rural + median_household_income + population + per_access_to_exercise + per_uninsured + primary_care_phys_quartile + mental_health_providers_quartile + per_child_poverty + air_pollution_avg_daily_pm2.5 + drinking_water_violation_quartile + per_severe_house_cost_burden + per_severe_housing_problems + per_food_insecure + sum_4_cumulative + per_limited_access_healthy_food + per_black + per_asian + per_am_indian_alaska_native + per_nativeHA_other_pacific_isl + per_hispanic + per_smokers + per_physically_inactive + per_excessive_drinking + per_flu_vaccinated + per_completed_hs + per_some_college + traffic_volume + per_broadband_access, data = sum_4_normalized_pressures_health_selective_no_na_top_counties, importance = TRUE)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 9
##
## Mean of squared residuals: 14.45692
## % Var explained: 34.68
print(importance(rf_per_adult_obesity_top_counties,type = 2))
## IncNodePurity
## per_rural 278.05949
## median_household_income 655.68342
## population 446.01746
## per_access_to_exercise 304.42289
## per_uninsured 352.06937
## primary_care_phys_quartile 130.37039
## mental_health_providers_quartile 144.88528
## per_child_poverty 572.30404
## air_pollution_avg_daily_pm2.5 406.12721
## drinking_water_violation_quartile 48.83128
## per_severe_house_cost_burden 198.77955
## per_severe_housing_problems 177.27064
## per_food_insecure 603.60036
## sum_4_cumulative 363.91150
## per_limited_access_healthy_food 224.50740
## per_black 818.31700
## per_asian 245.11813
## per_am_indian_alaska_native 213.50143
## per_nativeHA_other_pacific_isl 83.95901
## per_hispanic 565.44308
## per_smokers 296.27611
## per_physically_inactive 1365.27588
## per_excessive_drinking 395.70774
## per_flu_vaccinated 371.47379
## per_completed_hs 222.68117
## per_some_college 329.73818
## traffic_volume 352.36274
## per_broadband_access 360.69014
varImpPlot(rf_per_adult_obesity_top_counties, type = 2)